#delimit ;
set more off;
** REPLACE FILE PATH WITH PATH TO RELEVANT REPLICATION FILES;
local fileloc = "~/KMS_REPLICATION";
cap log close;
log using `fileloc'/log_files/KMS_montecarlo_work.txt, replace;
clear all;
pause on;


** Note: This do-file reproduces the data for Monte Carlo figures;
** Run regression prep;
qui do `fileloc'/all_regression_prep_hazard_collapsed.do `fileloc' _all;

**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
**XXXXXXXXXXXXXXXXXXXXXX REGS XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;
**XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX;

quietly {;

	** Make coefficients easier to read;
	replace died = died * 1000;

	** Note the dividing – done so Stata has no problem with standard errors;
	foreach weather in max_temp windspeed humidS {;
		gen double `weather'sq = `weather'^2;
		gen double `weather'cu = `weather'^3/1000;
	};

	foreach weather in rain {;
		gen double `weather'sq = `weather'^2;
		gen double `weather'cu = `weather'^3;
	};

	** Instrumentation involves both total flow AND weather interactions – this code generates said interactions;
	foreach weathervar in $weather_linear $weather_quadratic $weather_cubic {;
		gen double tot_flowIV_`weathervar' = tot_flow_base*`weathervar';
	};

	egen maxtraffic = max(tot_flow_base), by(mother_zip);
	drop if maxtraffic == 0;

	egen zip_rmonth = group(mother_zip event_month);

	egen month_by_year = group(event_month event_year);

	xi i.birth_quarter i.month_by_year;

	save `fileloc'/simulation/KMS_mc_data.dta, replace;

	** Now the data for generating predicted bias;
	use `fileloc'/simulation/togenerate_bias_inreal_data.dta, clear;

	** And regressions to get coefficients;
	reg diff_minus_FE lagged_resid pmspline* distspline*;

	use `fileloc'/simulation/KMS_mc_data.dta, clear;
	** Generate spline for predicted values;
	mkspline pmspline1 150 pmspline2 300 pmspline3 450 pmspline4 600 pmspline5 750 pmspline6 = weekly_pm10;
	** Generate spline for distance;
	mkspline distspline1 3 distspline2 6 distspline3 9 distspline4 12 distspline5 15 distspline6 = min_dist;

	** predict bias from earlier data;
	gen lagged_resid = 0;
	predict pred_bias;
	preserve;

	** Now data for generating predicted bias;
	use `fileloc'/simulation/togenerate_bias_inreal_data.dta, clear;
	** And regressions to get coefficients – variance;
	reg SUE lagged_SUE pmspline* distspline*;
	restore;

	** Predict variance;
	gen lagged_SUE = 0;
	predict pred_var;

	** Add on coefficients;
	append using `fileloc'/simulation/error_spline_coefs.dta;
	gen lagged_resid_coef = estimate if parm == "lagged_resid";
	egen maxlagged_resid = max(lagged_resid_coef);
	replace lagged_resid_coef = maxlagged_resid;
	drop maxlagged_resid;
	drop if parm ~= "";

	append using `fileloc'/simulation/squared_error_spline_coefs.dta;
	gen lagged_SUE_coef = estimate if parm == "lagged_SUE";
	egen maxlagged_SUE = max(lagged_SUE_coef);
	replace lagged_SUE_coef = maxlagged_SUE;
	drop maxlagged_SUE;
	drop if parm ~= "";

	/* prepare to run monte carlo */;
	local postlist = "b se F_stat alt_t" ;

	cap postclose mcoutput;

	qui postfile mcoutput `postlist' using `fileloc'/simulation/KMS_mcresults.dta, every(5) replace ;


	** MAIN ESTIMATE FOR BASELINE;
	xtset zip_rmonth;
	xtivreg2 died (weekly_pm10 = tot_flow_base tot_flowIV_*) $triweekly_pm10  $weather1 $weather2 $controls $agespline _I* [fw = weight], ffirst  nocollin fe cluster(mother_zip);

	local b = _b[weekly_pm10] ;
	local b_real = _b[weekly_pm10];
	local se = _se[weekly_pm10] ;
	cap matrix drop F;
	matrix F = e(first);
	estadd scalar F_stat = F[7,1];
	local F = F[7,1];
	post mcoutput (`b') (`se') (`F') (.);

	set seed 2010;

	preserve;
	tempfile baseline;
	drop pred_bias pred_var lagged_resid_coef lagged_SUE_coef;
	noisily compress;
	sort mother_zip merging_week;
	save `baseline';
	restore;

	tempfile bias;
	bysort mother_zip merging_week: keep if _n == 1;
	gen error = pred_bias;
	keep mother_zip merging_week error pred_bias pred_var lagged_resid_coef lagged_SUE_coef;
	save `bias';

	/* main loop – 100 regressions */;
	forvalues mc = 1/100 { ;
		use `bias', clear;
		tsset mother_zip merging_week;
		sort mother_zip merging_week;
	
		** Generate random component, with correlated error;
		by mother_zip: replace error = pred_bias + L.error*lagged_resid_coef + rnormal()*sqrt(max(0,pred_var + L.error*L.error*lagged_SUE_coef)) if _n > 1;
	
		keep mother_zip merging_week error;
	
		joinby mother_zip merging_week using `baseline', unmatched(both);	
		tab _merge;
		keep if _merge == 3;
		drop _merge;

		gen weekly_pm10_noise = weekly_pm10 + error;
		local i = `mc';
		noisily di `i';
	
		xtset zip_rmonth;
		xtivreg2 died (weekly_pm10_noise = tot_flow_base tot_flowIV_*) $triweekly_pm10  $weather1 $weather2 $controls $agespline _I* [fw = weight], ffirst nocollin fe cluster(mother_zip);

		local b = _b[weekly_pm10_noise] ;
		noisily di `b';
		local se = _se[weekly_pm10_noise] ;
		local alt_beta_test = (_b[weekly_pm10_noise] - `b_real')/_se[weekly_pm10_noise];
		cap matrix drop F;
		matrix F = e(first);
		estadd scalar F_stat = F[7,1];
		local F = F[7,1];
		/* save the results */
		post mcoutput (`b') (`se') (`F') (`alt_beta_test');
	
	} ;

};

/* take the whole package of results and save them */;
postclose mcoutput ;

use `fileloc'/simulation/KMS_mcresults.dta, clear;

sum b if alt_t == .;
local main = r(mean);

twoway kdensity b,
	xline(`main')
	ytitle("Density")
	xtitle("Estimated Beta")
	$graph_options;
graph export `fileloc'/graphs/MC_betas.eps, replace;

twoway kdensity F_stat if alt_t ~= .,
	xline(`mean')
	ytitle("Density")
	xtitle("Estimated First Stage F-Stat")
	$graph_options;
graph export `fileloc'/graphs/MC_Fstat.eps, replace;

log close;